-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[LV] Provide utility routine to find uncounted exit recipes #152530
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[LV] Provide utility routine to find uncounted exit recipes #152530
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Graham Hunter (huntergr-arm) ChangesSplitting out just the recipe finding code from #148626 into a utility function (along with the extra pattern matchers). Hopefully this makes reviewing a bit easier. Added a gtest, since this isn't actually used anywhere yet. Full diff: https://github.com/llvm/llvm-project/pull/152530.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 8818843a30625..d7f9763c4d0c8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -692,6 +692,37 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
}
+struct loop_invariant_vpvalue {
+ template <typename ITy> bool match(ITy *V) const {
+ VPValue *Val = dyn_cast<VPValue>(V);
+ return Val && Val->isDefinedOutsideLoopRegions();
+ }
+};
+
+inline loop_invariant_vpvalue m_LoopInvVPValue() {
+ return loop_invariant_vpvalue();
+}
+
+template <typename Op0_t>
+inline UnaryVPInstruction_match<Op0_t, VPInstruction::AnyOf>
+m_AnyOf(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::AnyOf>(Op0);
+}
+
+template <typename SubPattern_t> struct OneUse_match {
+ SubPattern_t SubPattern;
+
+ OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ return V->hasOneUse() && SubPattern.match(V);
+ }
+};
+
+template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
+ return SubPattern;
+}
+
} // namespace VPlanPatternMatch
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 14f20c65a7034..358c38f49405c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -138,3 +138,103 @@ VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
});
return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
}
+
+std::optional<VPValue *> vputils::getRecipesForUncountedExit(
+ VPlan &Plan, SmallVectorImpl<VPRecipeBase *> &Recipes,
+ SmallVectorImpl<VPReplicateRecipe *> &GEPs) {
+ using namespace llvm::VPlanPatternMatch;
+ // Given a vplan like the following (just including the recipes contributing
+ // to loop control exiting here, not the actual work), we're looking to match
+ // the recipes contributing to the uncounted exit condition comparison
+ // (here, vp<%4>) back to the canonical induction for the vector body so that
+ // we can copy them to a preheader and rotate the address in the loop to the
+ // next vector iteration.
+ //
+ // VPlan ' for UF>=1' {
+ // Live-in vp<%0> = VF
+ // Live-in ir<64> = original trip-count
+ //
+ // entry:
+ // Successor(s): preheader, vector.ph
+ //
+ // vector.ph:
+ // Successor(s): vector loop
+ //
+ // <x1> vector loop: {
+ // vector.body:
+ // EMIT vp<%2> = CANONICAL-INDUCTION ir<0>
+ // vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0>
+ // CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3>
+ // WIDEN ir<%ee.load> = load ir<%ee.addr>
+ // WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0>
+ // EMIT vp<%5> = any-of vp<%4>
+ // EMIT vp<%6> = add vp<%2>, vp<%0>
+ // EMIT vp<%7> = icmp eq vp<%6>, ir<64>
+ // EMIT vp<%8> = or vp<%5>, vp<%7>
+ // EMIT branch-on-cond vp<%8>
+ // No successors
+ // }
+ // Successor(s): middle.block
+ //
+ // middle.block:
+ // Successor(s): preheader
+ //
+ // preheader:
+ // No successors
+ // }
+
+ // Find the uncounted loop exit condition.
+ auto *Region = Plan.getVectorLoopRegion();
+ VPValue *UncountedCondition = nullptr;
+ if (!match(
+ Region->getExitingBasicBlock()->getTerminator(),
+ m_BranchOnCond(m_OneUse(m_c_BinaryOr(
+ m_OneUse(m_AnyOf(m_VPValue(UncountedCondition))), m_VPValue())))))
+ return std::nullopt;
+
+ SmallVector<VPValue *, 4> Worklist;
+ bool LoadFound = false;
+ Worklist.push_back(UncountedCondition);
+ while (!Worklist.empty()) {
+ VPValue *V = Worklist.pop_back_val();
+
+ if (V->isDefinedOutsideLoopRegions())
+ continue;
+ if (V->getNumUsers() > 1)
+ return std::nullopt;
+
+ if (auto *Cmp = dyn_cast<VPWidenRecipe>(V)) {
+ if (Cmp->getOpcode() != Instruction::ICmp)
+ return std::nullopt;
+ Worklist.push_back(Cmp->getOperand(0));
+ Worklist.push_back(Cmp->getOperand(1));
+ Recipes.push_back(Cmp);
+ } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
+ if (!Load->isConsecutive() || Load->isMasked())
+ return std::nullopt;
+ Worklist.push_back(Load->getAddr());
+ Recipes.push_back(Load);
+ LoadFound = true;
+ } else if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(V)) {
+ Worklist.push_back(VecPtr->getOperand(0));
+ Recipes.push_back(VecPtr);
+ } else if (auto *GEP = dyn_cast<VPReplicateRecipe>(V)) {
+ if (GEP->getNumOperands() != 2)
+ return std::nullopt;
+ if (!match(GEP, m_GetElementPtr(
+ m_LoopInvVPValue(),
+ m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()),
+ m_SpecificInt(1),
+ m_Specific(&Plan.getVF())))))
+ return std::nullopt;
+ GEPs.push_back(GEP);
+ Recipes.push_back(GEP);
+ } else
+ return std::nullopt;
+ }
+
+ if (GEPs.empty() || !LoadFound)
+ return std::nullopt;
+
+ return UncountedCondition;
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 8dcd57f1b3598..631d7aa8da9ee 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -97,6 +97,14 @@ bool isUniformAcrossVFsAndUFs(VPValue *V);
/// Returns the header block of the first, top-level loop, or null if none
/// exist.
VPBasicBlock *getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT);
+
+/// Returns the VPValue representing the uncounted exit comparison if all the
+/// recipes needed to form the condition within the vector loop body were
+/// matched.
+std::optional<VPValue *>
+getRecipesForUncountedExit(VPlan &Plan,
+ SmallVectorImpl<VPRecipeBase *> &Recipes,
+ SmallVectorImpl<VPReplicateRecipe *> &GEPs);
} // namespace vputils
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 24f6d61512ef6..5fecbbdef4b5b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -148,6 +148,8 @@ class LLVM_ABI_FOR_TEST VPValue {
return Current != user_end();
}
+ bool hasOneUse() const { return getNumUsers() == 1; }
+
void replaceAllUsesWith(VPValue *New);
/// Go through the uses list for this VPValue and make each use point to \p
diff --git a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
index 53eeff28c185f..a7254922af007 100644
--- a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
@@ -14,5 +14,6 @@ add_llvm_unittest(VectorizeTests
VPlanHCFGTest.cpp
VPlanPatternMatchTest.cpp
VPlanSlpTest.cpp
+ VPlanUncountedExitTest.cpp
VPlanVerifierTest.cpp
)
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
new file mode 100644
index 0000000000000..81ef67a0fb923
--- /dev/null
+++ b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
@@ -0,0 +1,99 @@
+//===- llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp -----===//
+//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../lib/Transforms/Vectorize/LoopVectorizationPlanner.h"
+#include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanPatternMatch.h"
+#include "../lib/Transforms/Vectorize/VPlanUtils.h"
+#include "VPlanTestBase.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+
+namespace {
+class VPUncountedExitTest : public VPlanTestBase {};
+
+TEST_F(VPUncountedExitTest, FindUncountedExitRecipes) {
+ // Create CFG skeleton.
+ VPlan &Plan = getPlan();
+ VPBasicBlock *ScalarPH = Plan.getEntry();
+ VPBasicBlock *Entry = Plan.createVPBasicBlock("entry");
+ Plan.setEntry(Entry);
+ VPBasicBlock *VectorPH = Plan.createVPBasicBlock("vector.ph");
+ VPBasicBlock *VecBody = Plan.createVPBasicBlock("vector.body");
+ VPRegionBlock *Region =
+ Plan.createVPRegionBlock(VecBody, VecBody, "vector loop");
+ VPBasicBlock *MiddleBlock = Plan.createVPBasicBlock("middle.block");
+ VPBlockUtils::connectBlocks(Entry, ScalarPH);
+ VPBlockUtils::connectBlocks(Entry, VectorPH);
+ VPBlockUtils::connectBlocks(VectorPH, Region);
+ VPBlockUtils::connectBlocks(Region, MiddleBlock);
+ VPBlockUtils::connectBlocks(MiddleBlock, ScalarPH);
+
+ // Live-Ins
+ IntegerType *I64Ty = IntegerType::get(C, 64);
+ IntegerType *I32Ty = IntegerType::get(C, 32);
+ PointerType *PTy = PointerType::get(C, 0);
+ VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 0));
+ VPValue *Inc = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 1));
+ VPValue *VF = &Plan.getVF();
+ Plan.setTripCount(Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 64)));
+
+ // Populate vector.body with the recipes for exiting.
+ auto *IV = new VPCanonicalIVPHIRecipe(Zero, {});
+ VecBody->appendRecipe(IV);
+ VPBuilder Builder(VecBody, VecBody->getFirstNonPhi());
+ auto *Steps = Builder.createScalarIVSteps(Instruction::Add, nullptr, IV, Inc,
+ VF, DebugLoc());
+
+ // Uncounted Exit; GEP -> Load -> Cmp
+ auto *DummyGEP = GetElementPtrInst::Create(I32Ty, Zero->getUnderlyingValue(),
+ {}, Twine("ee.addr"));
+ auto *GEP = new VPReplicateRecipe(DummyGEP, {Zero, Steps}, true, nullptr);
+ Builder.insert(GEP);
+ auto *DummyLoad =
+ new LoadInst(I32Ty, PoisonValue::get(PTy), "ee.load", false, Align(1));
+ VPValue *Load =
+ new VPWidenLoadRecipe(*DummyLoad, GEP, nullptr, true, false, {}, {});
+ Builder.insert(Load->getDefiningRecipe());
+ // Should really splat the zero, but we're not checking types here.
+ VPValue *Cmp = new VPWidenRecipe(Instruction::ICmp, {Load, Zero},
+ VPIRFlags(CmpInst::ICMP_EQ), {}, {});
+ Builder.insert(Cmp->getDefiningRecipe());
+ VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, Cmp);
+
+ // Counted Exit; Inc IV -> Cmp
+ VPValue *NextIV = Builder.createNaryOp(Instruction::Add, {IV, VF});
+ VPValue *Counted =
+ Builder.createICmp(CmpInst::ICMP_EQ, NextIV, Plan.getTripCount());
+
+ // Combine, and branch.
+ VPValue *Combined = Builder.createNaryOp(Instruction::Or, {AnyOf, Counted});
+ Builder.createNaryOp(VPInstruction::BranchOnCond, {Combined});
+
+ SmallVector<VPRecipeBase *, 8> Recipes;
+ SmallVector<VPReplicateRecipe *, 2> GEPs;
+
+ std::optional<VPValue *> UncountedCondition =
+ vputils::getRecipesForUncountedExit(Plan, Recipes, GEPs);
+ ASSERT_TRUE(UncountedCondition.has_value());
+ ASSERT_EQ(*UncountedCondition, Cmp);
+ ASSERT_EQ(GEPs.size(), 1ull);
+ ASSERT_EQ(GEPs[0], GEP);
+ ASSERT_EQ(Recipes.size(), 3ull);
+
+ delete DummyLoad;
+ delete DummyGEP;
+}
+
+} // namespace
+} // namespace llvm
|
/// Returns the VPValue representing the uncounted exit comparison if all the | ||
/// recipes needed to form the condition within the vector loop body were | ||
/// matched. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[...] if all the recipes needed to form the condition within the vector loop body were matched.
This isn't really saying much, do you mean to say:
Returns the VPValue representing the uncounted exit comparison for a restricted set of conditions that are considered safe to calculate for the next vector iteration.
? If so, then I'd rather you write that explicitly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not all the checks are carried out here, but I'll see if I can improve the wording a little.
if (V->getNumUsers() > 1) | ||
return std::nullopt; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the issue with having multiple users?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In theory, nothing, except that I'm trying to make the simplest version I can to start with to aid with reviewing, and I can then gradually increase what the code deals with.
In this case, having more than one user may mean the transform needs to create a PHI node for the value, since these nodes will be copied to the preheader and the versions inside the loop rotated to the next vector iteration.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That makes sense, you don't want the next vector iteration to have to recompute this again. Could you describe that in the comment? (possibly with a FIXME to add PHI nodes for these in the future)
if (V->getNumUsers() > 1) | ||
return std::nullopt; | ||
|
||
if (auto *Cmp = dyn_cast<VPWidenRecipe>(V)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The LoadFound
and GEPS.empty()
checks below suggest that this may be better handled with a match
expression (or a series of) with some additional checks to see if the load is consecutive and isn't masked.
Maybe also add a comment saying that this only supports a very specific case, but that this can be generalised in the future to other expressions as long as they no side-effects? (or something along those lines)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I could make it just match the exact IR (as I did in #137774) if needed, though I wanted to show the basics of what the code will look like for matching more loops later. This effectively has the same limitations as the straightline code, but can be extended easily in the future.
I suppose I could build up a list of loads to check, then do a more exacting match there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would the LoadFound
and GEPS.empty()
always be required though? My preference would be to write the code without worrying about how we might want to write this in the future.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The checks on that can be moved to the transform itself, which currently assumes that it's transforming a loop which has an uncounted condition based on at least one load that can be cloned into the preheader, and at least one GEP which must be adjusted.
Worklist.push_back(Cmp->getOperand(1)); | ||
Recipes.push_back(Cmp); | ||
} else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) { | ||
if (!Load->isConsecutive() || Load->isMasked()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why must the load be consecutive? (would strided not work as well?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Strided would, but gathers would not without first-faulting support, and there's no distinguishing between strided and gathers from the recipe alone.
Though the restrictions on the GEP effectively force this to be contiguous for now anyway.
@@ -148,6 +148,8 @@ class LLVM_ABI_FOR_TEST VPValue { | |||
return Current != user_end(); | |||
} | |||
|
|||
bool hasOneUse() const { return getNumUsers() == 1; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The name of the function doesn't match the code, i.e. one use != one user. I think it should either be:
bool hasOneUser() const { return getNumUsers() == 1; }
or
bool hasOneUse() const { return getNumUses() == 1; }
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uses are not directly modeled in VPlan, afaict.
Instead, a User may be recorded multiple times for a given VPValue. See the function directly above this, hasMoreThanOneUniqueUser
, which looks through the list of Users seeing if there's one that's different from the first.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @fhahn, I think this is pretty confusing. Can we rename the existing getNumUsers
to getNumUses
to more closely match the naming conventions used for IR values? If a recipe has duplicate input operands then the number of uses != 1, but the number of users may still be 1. If so, I'd like to put up a patch to tidy this up a little.
@@ -138,3 +138,103 @@ VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) { | |||
}); | |||
return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I); | |||
} | |||
|
|||
std::optional<VPValue *> vputils::getRecipesForUncountedExit( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this function only ever intended to be called before we handle the uncountable exit, i.e. before we've introduced the middle split block? If it's after then the vplan in the comments below probably needs updating to reference the middle.split block.
It might be good to clarify in the comments where you expect this function to be called, as it seems to require the vplan CFG to be in a particular format.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's intended to be called after the vplan has been created and the CFG simplified so there's only one exit. This is currently after the existing uncounted early exit transformation, but that is changed in the transform PR to not create the split.
We probably want to formalize the strategy as state in the VPlan so we know which parts of the transformations to apply.
if (!match( | ||
Region->getExitingBasicBlock()->getTerminator(), | ||
m_BranchOnCond(m_OneUse(m_c_BinaryOr( | ||
m_OneUse(m_AnyOf(m_VPValue(UncountedCondition))), m_VPValue()))))) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it matter if this is a loop with an uncountable early exit that doesn't match this pattern? If it's being called as part of VPlanTransforms::handleEarlyExits then it should be fine, but later on I can imagine a VPlanTransform may optimise some of this code. I assume that returning std::nullopt
would mean it's game over for vectorisation of a loop with an uncountable early exit and a store?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we can't match the expected recipes here, then the transform (from #148626) will abandon the vplan and vectorization will not proceed.
It's the reason I can do this as a vplan transform instead of manually planting recipes as part of the initial vplan creation based on what LoopVectorizationLegality finds -- I did it the latter way in 2015 when I originally prototyped early exit autovec.
// EMIT branch-on-cond vp<%8> | ||
// No successors | ||
// } | ||
// Successor(s): middle.block |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The IR above suggests we've already called handleUncountableEarlyExit
and so this probably should be // Successor(s): middle.split
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The transform PR explicitly avoids creating the split, since we won't have finished all iterations before leaving the vector body.
return std::nullopt; | ||
Worklist.push_back(Load->getAddr()); | ||
Recipes.push_back(Load); | ||
LoadFound = true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the general case, LoadFound
could be set to true for a vplan where neither icmp input is a load, but you just happen to bump into a load in the loop before the early exit. Are you specifically interested in finding any load in the loop, or finding a specific load, i.e. the load that forms the input to the icmp? If it's the latter I think you need to check that the load forms the input to the icmp before setting the boolean to true.
I realise that some of the legality work you're doing requires something of the form:
%load = load i32, ptr %gep
%icmp = icmp i32 %load, 3
but I don't think you should assume that here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I only consider operands from each recipe; that's what the worklist is for. So I start with the input to AnyOf above, which happens to be an icmp in the loop I'm using for the prototype, confirm that it's valid then add the two operands to the worklist.
In the case of the test, one of those operands is a live-in constant, so is accepted immediately. The other is the load, which is checked and the address recipe is added to the worklist.
I am moving the load checking to be more strict for now, as discussed with Sander above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well, I was worried about perfectly valid recipes like this that you might encounter:
%icmp1 = icmp %load1, %load2
%icmp2 = icmp %load3, %load4
%icmp3 = icmp %icmp1, %icmp2
%any_of = any_of %icmp3
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, that should be fine. As long as all the recipes contributing to the exit condition can be found and tracked back to the canonical IV or a live-in I don't see a problem with us matching more recipes.
If you want me to restrict it to a single comparison right now I can do so, but I wanted to show the basis of the transform approach -- using a worklist to handle vplan recipes instead of using IR directly during initial plan construction.
Splitting out just the recipe finding code from #148626 into a utility function (along with the extra pattern matchers). Hopefully this makes reviewing a bit easier.
Added a gtest, since this isn't actually used anywhere yet.